Agenda

  1. Principle component analysis
  2. Topic modeling
  3. Group breakout sessions (if time)

Principle component analysis

What are the primary reasons to use PCA?

Algorithm

  1. Find a linear combination of variables to create principle components
  2. Maintain as much variance as possible.
  3. Principle components are orthogonal (uncorrelated)

Setup

knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE)
library(tidyverse)
library(tidytext)
data(stop_words)

wine <- read_rds("../resources/variety-project.rds") %>% rowid_to_column("id") 

Pull a bunch of words

library(tidytext)
data(stop_words)

wine <- wine %>% 
  unnest_tokens(word, description) %>%
  anti_join(stop_words) %>%
  filter(!(word %in% c("drink","vineyard","variety","price","points","wine","pinot","chardonnay","gris","noir","riesling","syrah"))) %>% 
  count(id, word) %>% 
  group_by(word) %>% 
  mutate(total = sum(n)) %>% 
  filter(total > 500) %>% 
  ungroup %>% 
  group_by(id) %>% 
  mutate(exists = if_else(n>0,1,0)) %>% 
  ungroup %>% 
  pivot_wider(id_cols = id, names_from = word, values_from = exists, values_fill = c(exists=0)) %>% 
  right_join(wine, by="id") %>% 
  replace(.,is.na(.),0) %>% 
  mutate(log_price = log(price)) %>% 
  select(-id, -price, -description) 

names(wine)
##  [1] "flavors"   "tart"      "oak"       "finish"    "fruit"    
##  [6] "tannins"   "cherry"    "light"     "variety"   "points"   
## [11] "log_price"

Run the principle component analysis

pr_wine <- prcomp(x = select(wine,-variety), scale = T, center = T)
summary(pr_wine)
## Importance of components:
##                           PC1    PC2    PC3    PC4     PC5     PC6     PC7
## Standard deviation     1.3215 1.1071 1.0345 1.0073 0.98302 0.97868 0.96368
## Proportion of Variance 0.1746 0.1226 0.1070 0.1015 0.09663 0.09578 0.09287
## Cumulative Proportion  0.1746 0.2972 0.4042 0.5057 0.60231 0.69810 0.79096
##                           PC8     PC9    PC10
## Standard deviation     0.9375 0.88299 0.65704
## Proportion of Variance 0.0879 0.07797 0.04317
## Cumulative Proportion  0.8789 0.95683 1.00000

Show variance plot

screeplot(pr_wine, type = "lines")

Visualize biplots

biplot(pr_wine)

biplot(pr_wine, choices = c(3,4))

Factor loadings

pr_wine$rotation
##                   PC1          PC2         PC3         PC4         PC5
## flavors   -0.09586279  0.389440836 -0.43248272  0.14235301  0.32198847
## tart      -0.15495828 -0.381109570  0.25754936  0.35333777 -0.23613301
## oak        0.10988655  0.048765487  0.56910333 -0.35368385  0.61677902
## finish     0.05538319 -0.031176652 -0.38160699 -0.72190787 -0.31458193
## fruit      0.14835040 -0.514950105  0.16337150 -0.24647009 -0.11100017
## tannins    0.11507476 -0.422731509 -0.34175797  0.31485067  0.20099267
## cherry     0.37745872 -0.299556237 -0.21965954 -0.06794250  0.11769187
## light     -0.22191639 -0.321285160 -0.28682880 -0.11415138  0.52598416
## points     0.58875678  0.250617819  0.03265683  0.09802766 -0.08867767
## log_price  0.61485717 -0.009917253 -0.04525738  0.14666361  0.10046362
##                  PC6         PC7         PC8         PC9         PC10
## flavors    0.2943509 -0.37547201 -0.53999015  0.09411969 -0.009713721
## tart      -0.3547888 -0.52638039 -0.36184664 -0.20054482 -0.098502625
## oak       -0.1417953  0.08153352 -0.32291222 -0.14125713 -0.071973147
## finish    -0.3761705 -0.04822750 -0.28703106 -0.06317100 -0.004062862
## fruit      0.5314431 -0.14102233 -0.17992002  0.52757732 -0.005623983
## tannins   -0.1936070  0.59748865 -0.35232888  0.08309068 -0.161902215
## cherry     0.3748396 -0.14922759  0.13142585 -0.71143854 -0.121991476
## light     -0.2772870 -0.34987118  0.45610413  0.23509155 -0.113892089
## points    -0.1678861 -0.15427722  0.09424389  0.24896518 -0.670196814
## log_price -0.2457200 -0.16616464  0.01122554  0.13542793  0.694078656

Loadings times values for all rows

head(pr_wine$x)
##              PC1        PC2         PC3        PC4        PC5         PC6
## [1,] -2.51789057  0.4040814  0.41214587  1.5356567 -0.6488525 -0.35115520
## [2,] -0.05329592  0.6776983  0.41168677  0.6827924 -0.3233852 -0.66438711
## [3,] -1.59677498 -0.2485021  2.81897793  0.3752514  0.4786475 -1.50262229
## [4,] -0.15575115 -0.5264200 -0.09364021 -1.5356814 -1.2681434 -0.23305233
## [5,] -1.41723444  0.1552330 -1.36797242  1.5417984  0.7288760 -0.06114332
## [6,] -1.60016306  1.3850886 -0.38293175  0.6603520  0.1678791  0.44228922
##             PC7         PC8        PC9         PC10
## [1,] -1.0781328 -1.16506405 -1.0749213 -0.656687443
## [2,]  0.6757479  0.97921893 -0.2898472  1.710802572
## [3,] -0.2156086 -0.96692838 -1.5598540 -0.355556765
## [4,]  0.4221780 -0.06581793  0.4839961  1.581293171
## [5,]  2.0706983 -1.18788571 -0.2345163  0.004556595
## [6,]  0.2877224 -0.15801766 -0.4431403  0.648703584

Add variety label to principle components and name them

prc <- bind_cols(select(wine,variety),as.data.frame(pr_wine$x)) %>% 
  select(1:5) %>% 
  rename("quality" = PC1) %>% 
  rename("flavor" = PC2) %>% 
  rename("oak" = PC3) %>% 
  rename("tart" = PC4)
head(prc)
variety quality flavor oak tart
Pinot_Gris -2.5178906 0.4040814 0.4121459 1.5356567
Pinot_Noir -0.0532959 0.6776983 0.4116868 0.6827924
Pinot_Noir -1.5967750 -0.2485021 2.8189779 0.3752514
Pinot_Noir -0.1557511 -0.5264200 -0.0936402 -1.5356814
Pinot_Noir -1.4172344 0.1552330 -1.3679724 1.5417984
Pinot_Noir -1.6001631 1.3850886 -0.3829317 0.6603520

Density of different components by variety (Factors 1&2)

prc %>% 
  select(variety, quality, flavor) %>% 
  pivot_longer(cols = -variety,names_to = "component",values_to = "loading") %>% 
  ggplot(aes(loading, fill=variety))+
  geom_density(alpha=0.5)+
  facet_grid(.~component)

Density of different components by variety (Factors 3&4)

prc %>% 
  select(variety, oak, tart) %>% 
  pivot_longer(cols = -variety,names_to = "component",values_to = "loading") %>% 
  ggplot(aes(loading, fill=variety))+
  geom_density(alpha=0.5)+
  facet_grid(.~component)

What if we add back more words (features)?

wine <- read_rds("../resources/variety-project.rds") %>% rowid_to_column("id") 
wine <- wine %>% 
  unnest_tokens(word, description) %>%
  anti_join(stop_words) %>%
  filter(!(word %in% c("drink","vineyard","variety","price","points","wine","pinot","chardonnay","gris","noir","riesling","syrah"))) %>% 
  count(id, word) %>% 
  group_by(word) %>% 
  mutate(total = sum(n)) %>% 
  filter(total > 100) %>% 
  ungroup %>% 
  group_by(id) %>% 
  mutate(exists = if_else(n>0,1,0)) %>% 
  ungroup %>% 
  pivot_wider(id_cols = id, names_from = word, values_from = exists, values_fill = c(exists=0)) %>% 
  right_join(wine, by="id") %>% 
  replace(.,is.na(.),0) %>% 
  mutate(log_price = log(price)) %>% 
  select(-id, -price, -description) 

names(wine)
##   [1] "acidity"       "crisp"         "fermented"     "flavors"      
##   [5] "green"         "pineapple"     "stainless"     "tart"         
##   [9] "bottling"      "earthy"        "herbal"        "tannic"       
##  [13] "aged"          "berry"         "chocolate"     "herb"         
##  [17] "hint"          "mix"           "months"        "oak"          
##  [21] "oregon"        "stem"          "finish"        "flavor"       
##  [25] "fruit"         "leaf"          "notes"         "sweet"        
##  [29] "touch"         "brings"        "tannins"       "fruity"       
##  [33] "lightly"       "pleasant"      "simple"        "strawberry"   
##  [37] "acids"         "cherry"        "forward"       "lingering"    
##  [41] "ripe"          "textural"      "acid"          "aromas"       
##  [45] "cranberry"     "light"         "barrels"       "spicy"        
##  [49] "age"           "bit"           "bottle"        "offers"       
##  [53] "sharp"         "tight"         "black"         "candy"        
##  [57] "cellar"        "depth"         "grapes"        "grown"        
##  [61] "red"           "smooth"        "supple"        "aging"        
##  [65] "clean"         "concentrated"  "dark"          "earth"        
##  [69] "fine"          "balance"       "berries"       "complex"      
##  [73] "elegant"       "estate"        "peach"         "selection"    
##  [77] "streak"        "tea"           "vintage"       "whiff"        
##  [81] "wild"          "wines"         "fruits"        "palate"       
##  [85] "2020"          "character"     "delicious"     "juicy"        
##  [89] "bright"        "carries"       "orange"        "tangy"        
##  [93] "citrus"        "mouthfeel"     "reserve"       "spent"        
##  [97] "time"          "balanced"      "blend"         "coffee"       
## [101] "alcohol"       "plenty"        "power"         "slightly"     
## [105] "ava"           "sourced"       "stone"         "vines"        
## [109] "hints"         "length"        "midpalate"     "peel"         
## [113] "rose"          "subtle"        "caramel"       "cola"         
## [117] "deep"          "streaks"       "barrel"        "cuvée"        
## [121] "dried"         "mouth"         "concentration" "rich"         
## [125] "round"         "winery's"      "complexity"    "excellent"    
## [129] "pinots"        "valley"        "grapefruit"    "minerality"   
## [133] "single"        "style"         "note"          "dry"          
## [137] "pear"          "fresh"         "cherries"      "lovely"       
## [141] "pepper"        "lemon"         "melon"         "apple"        
## [145] "cinnamon"      "pretty"        "french"        "spice"        
## [149] "vanilla"       "drinking"      "soft"          "highlights"   
## [153] "plum"          "texture"       "clone"         "effort"       
## [157] "focused"       "quickly"       "loaded"        "aromatic"     
## [161] "color"         "delicate"      "scents"        "toast"        
## [165] "mineral"       "lively"        "white"         "toasty"       
## [169] "baking"        "spices"        "lush"          "thin"         
## [173] "bitter"        "bodied"        "medium"        "firm"         
## [177] "nicely"        "vineyards"     "sugar"         "raspberry"    
## [181] "extra"         "willamette"    "pleasing"      "nose"         
## [185] "core"          "substantial"   "variety"       "points"       
## [189] "log_price"

Run the principle component analysis

pr_wine <- prcomp(x = select(wine,-variety), scale = T, center = T)
screeplot(pr_wine, type = "lines")

Find the features with the highest loadings per factor

rownames_to_column(as.data.frame(pr_wine$rotation)) %>% 
  select(1:5) %>% 
  filter(abs(PC1) >= 0.25 | abs(PC2) >= 0.25 | abs(PC3) >= 0.25 | abs(PC4) >= 0.25)
rowname PC1 PC2 PC3 PC4
months 0.1116653 0.2642817 0.2957780 -0.0009590
oak 0.0913025 0.3178890 0.3133270 0.0000240
valley 0.0461676 0.0195822 -0.0093578 0.2949542
french 0.1105282 0.2947821 0.2935521 -0.0207073
willamette 0.0474642 0.0211232 -0.0065160 0.2918651
points 0.2060537 0.2563044 -0.3258908 0.0331730
log_price 0.3548499 0.0535944 -0.0912909 0.0433292

Name and graph the factors

prc <- bind_cols(select(wine,variety),as.data.frame(pr_wine$x)) %>% 
  select(1:5) %>% 
  rename("pricey"=PC1, "quality_french_oak"=PC2, "crappy_french_oak"=PC3, "willamette_valley"=PC4)


prc %>% 
  select(variety, pricey,willamette_valley) %>% 
  pivot_longer(cols = -variety,names_to = "component",values_to = "loading") %>% 
  ggplot(aes(loading, fill=variety))+
  geom_density(alpha=0.5)+
  facet_grid(.~component)

prc %>% 
  select(variety, quality_french_oak, crappy_french_oak) %>% 
  pivot_longer(cols = -variety,names_to = "component",values_to = "loading") %>% 
  ggplot(aes(loading, fill=variety))+
  geom_density(alpha=0.5)+
  facet_grid(.~component)

(Hard) Exercise

  1. Use the prc dataframe
  2. to create a stacked bar graph with loading on y-axis
  3. and the factors on x-axis

Use these 4 factors for prediction

library(caret)
fit <- train(variety ~ .,
             data = prc, 
             method = "naive_bayes",
             metric = "Kappa",
             trControl = trainControl(method = "cv"))

confusionMatrix(predict(fit, prc),factor(prc$variety))
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   Chardonnay Pinot_Gris Pinot_Noir Riesling Syrah
##   Chardonnay        161         47         22       10     3
##   Pinot_Gris        116        270         29      105     1
##   Pinot_Noir        179         72       2681       39   168
##   Riesling           26         49          5       70     0
##   Syrah               0          0          0        0     0
## 
## Overall Statistics
##                                           
##                Accuracy : 0.7851          
##                  95% CI : (0.7721, 0.7977)
##     No Information Rate : 0.6753          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.5266          
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: Chardonnay Class: Pinot_Gris Class: Pinot_Noir
## Sensitivity                    0.33402           0.61644            0.9795
## Specificity                    0.97704           0.93057            0.6520
## Pos Pred Value                 0.66255           0.51823            0.8541
## Neg Pred Value                 0.91575           0.95243            0.9387
## Prevalence                     0.11892           0.10807            0.6753
## Detection Rate                 0.03972           0.06662            0.6615
## Detection Prevalence           0.05996           0.12855            0.7745
## Balanced Accuracy              0.65553           0.77350            0.8158
##                      Class: Riesling Class: Syrah
## Sensitivity                  0.31250      0.00000
## Specificity                  0.97911      1.00000
## Pos Pred Value               0.46667          NaN
## Neg Pred Value               0.96054      0.95756
## Prevalence                   0.05527      0.04244
## Detection Rate               0.01727      0.00000
## Detection Prevalence         0.03701      0.00000
## Balanced Accuracy            0.64580      0.50000

…Dang!!!

Topic Modeling

Latent Dirichlet allocation

Latent Dirichlet allocation is one of the most common algorithms for topic modeling. Without diving into the math behind the model, we can understand it as being guided by two principles.

  1. Every document is a mixture of topics. We imagine that each document may contain words from several topics in particular proportions. For example, in a two-topic model we could say “Document 1 is 90% topic A and 10% topic B, while Document 2 is 30% topic A and 70% topic B.”

  2. Every topic is a mixture of words. For example, we could imagine a two-topic model of American news, with one topic for “politics” and one for “entertainment.” The most common words in the politics topic might be “President”, “Congress”, and “government”, while the entertainment topic may be made up of words such as “movies”, “television”, and “actor”. Importantly, words can be shared between topics; a word like “budget” might appear in both equally.

See: https://www.tidytextmining.com/topicmodeling.html#latent-dirichlet-allocation

LDA is a mathematical method for estimating both of these at the same time: finding the mixture of words that is associated with each topic, while also determining the mixture of topics that describes each document.

Running a model

library(topicmodels)

wine_dtm <- read_rds("../resources/variety-project.rds") %>% rowid_to_column("id") %>% 
  unnest_tokens(word, description) %>%
  anti_join(stop_words) %>%
  filter(!(word %in% c("drink","vineyard","variety","price","points","wine","pinot","chardonnay","gris","noir","riesling","syrah"))) %>% 
  count(id,word) %>% 
  cast_dtm(id, word, n)

wine_lda <- LDA(wine_dtm, k = 4, control = list(seed = 5004))
wine_lda
## A LDA_VEM topic model with 4 topics.

Word-topic probabilities

topics <- tidy(wine_lda, matrix = "beta")
head(topics)
topic term beta
1 acidity 0.0029667
2 acidity 0.0042483
3 acidity 0.0040735
4 acidity 0.0067402
1 crisp 0.0000277
2 crisp 0.0003778
top_terms <- topics %>%
  group_by(topic) %>%
  top_n(10, beta) %>%
  ungroup() %>%
  arrange(topic, -beta)

top_terms %>%
  mutate(term = reorder_within(term, beta, topic)) %>%
  ggplot(aes(term, beta, fill = factor(topic))) +
  geom_col(show.legend = FALSE) +
  facet_wrap(~ topic, scales = "free") +
  coord_flip() +
  scale_x_reordered()

Document-topic probabilities

topics <- tidy(wine_lda, matrix = "gamma")
head(topics)
document topic gamma
1 1 0.2510006
2 1 0.2499304
3 1 0.2480522
4 1 0.2425154
5 1 0.2518187
6 1 0.2563809

What if we pivot wider?

wine <- read_rds("../resources/variety-project.rds") %>% rowid_to_column("id")

topics <- topics %>% 
  pivot_wider(id_cols = document,names_from = topic,values_from = gamma, names_prefix = "topic_") %>% 
  mutate(id=as.integer(document)) %>%
  left_join(select(wine, id, variety)) %>% 
  select(-document, -id)

head(topics)
topic_1 topic_2 topic_3 topic_4 variety
0.2510006 0.2446511 0.2519079 0.2524405 Pinot_Gris
0.2499304 0.2480786 0.2499077 0.2520833 Pinot_Noir
0.2480522 0.2541088 0.2467133 0.2511257 Pinot_Noir
0.2425154 0.2499155 0.2567310 0.2508381 Pinot_Noir
0.2518187 0.2469421 0.2527943 0.2484449 Pinot_Noir
0.2563809 0.2480390 0.2455539 0.2500262 Pinot_Noir

Can we use topics to model an outcome?

fit <- train(variety ~ .,
             data = topics, 
             method = "naive_bayes",
             metric = "Kappa",
             trControl = trainControl(method = "cv"))

confusionMatrix(predict(fit, topics),factor(topics$variety))
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   Chardonnay Pinot_Gris Pinot_Noir Riesling Syrah
##   Chardonnay         83         31         86        6     3
##   Pinot_Gris          0          1          2        0     0
##   Pinot_Noir        399        406       2649      217   167
##   Riesling            0          0          0        1     0
##   Syrah               0          0          0        0     2
## 
## Overall Statistics
##                                           
##                Accuracy : 0.6751          
##                  95% CI : (0.6604, 0.6895)
##     No Information Rate : 0.6753          
##     P-Value [Acc > NIR] : 0.5208          
##                                           
##                   Kappa : 0.0828          
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: Chardonnay Class: Pinot_Gris Class: Pinot_Noir
## Sensitivity                    0.17220         0.0022831            0.9678
## Specificity                    0.96472         0.9994467            0.0965
## Pos Pred Value                 0.39713         0.3333333            0.6902
## Neg Pred Value                 0.89620         0.8920988            0.5907
## Prevalence                     0.11892         0.1080681            0.6753
## Detection Rate                 0.02048         0.0002467            0.6536
## Detection Prevalence           0.05157         0.0007402            0.9470
## Balanced Accuracy              0.56846         0.5008649            0.5322
##                      Class: Riesling Class: Syrah
## Sensitivity                0.0044643    0.0116279
## Specificity                1.0000000    1.0000000
## Pos Pred Value             1.0000000    1.0000000
## Neg Pred Value             0.9449654    0.9580351
## Prevalence                 0.0552677    0.0424377
## Detection Rate             0.0002467    0.0004935
## Detection Prevalence       0.0002467    0.0004935
## Balanced Accuracy          0.5022321    0.5058140

What if we used more topics?

wine_lda <- LDA(wine_dtm, k = 20, control = list(seed = 5004))

topics <- tidy(wine_lda, matrix = "gamma") %>% 
  pivot_wider(id_cols = document,names_from = topic,values_from = gamma, names_prefix = "topic_") %>% 
  mutate(id=as.integer(document)) %>% 
  left_join(select(wine, id, variety)) %>% 
  select(-document, -id)

fit <- train(variety ~ .,
             data = topics, 
             method = "naive_bayes",
             metric = "Kappa",
             trControl = trainControl(method = "cv"))

confusionMatrix(predict(fit, topics),factor(topics$variety))
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   Chardonnay Pinot_Gris Pinot_Noir Riesling Syrah
##   Chardonnay        164         26         55       13     3
##   Pinot_Gris         72        212         93       51     5
##   Pinot_Noir        232        183       2549       89   133
##   Riesling           14         16         36       69     1
##   Syrah               0          1          4        2    30
## 
## Overall Statistics
##                                           
##                Accuracy : 0.7461          
##                  95% CI : (0.7324, 0.7595)
##     No Information Rate : 0.6753          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.4329          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
## 
## Statistics by Class:
## 
##                      Class: Chardonnay Class: Pinot_Gris Class: Pinot_Noir
## Sensitivity                    0.34025           0.48402            0.9313
## Specificity                    0.97284           0.93887            0.5160
## Pos Pred Value                 0.62835           0.48961            0.8001
## Neg Pred Value                 0.91614           0.93757            0.7832
## Prevalence                     0.11892           0.10807            0.6753
## Detection Rate                 0.04046           0.05231            0.6289
## Detection Prevalence           0.06440           0.10683            0.7861
## Balanced Accuracy              0.65654           0.71144            0.7236
##                      Class: Riesling Class: Syrah
## Sensitivity                  0.30804     0.174419
## Specificity                  0.98250     0.998196
## Pos Pred Value               0.50735     0.810811
## Neg Pred Value               0.96043     0.964641
## Prevalence                   0.05527     0.042438
## Detection Rate               0.01702     0.007402
## Detection Prevalence         0.03356     0.009129
## Balanced Accuracy            0.64527     0.586307

What if we allow price and points back in?

topics <- tidy(wine_lda, matrix = "gamma") %>% 
  pivot_wider(id_cols = document,names_from = topic,values_from = gamma, names_prefix = "topic_") %>% 
  mutate(id=as.integer(document)) %>% 
  left_join(wine) %>% 
  mutate(log_price = log(price)) %>% 
  select(-document, -id, -description, -price)

fit <- train(variety ~ .,
             data = topics, 
             method = "naive_bayes",
             metric = "Kappa",
             trControl = trainControl(method = "cv"))

confusionMatrix(predict(fit, topics),factor(topics$variety))
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   Chardonnay Pinot_Gris Pinot_Noir Riesling Syrah
##   Chardonnay        176         10         47       12     3
##   Pinot_Gris         81        352         98       73     8
##   Pinot_Noir        214         55       2572       48   126
##   Riesling           11         21         15       90     1
##   Syrah               0          0          5        1    34
## 
## Overall Statistics
##                                           
##                Accuracy : 0.7955          
##                  95% CI : (0.7827, 0.8078)
##     No Information Rate : 0.6753          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.5664          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
## 
## Statistics by Class:
## 
##                      Class: Chardonnay Class: Pinot_Gris Class: Pinot_Noir
## Sensitivity                    0.36515           0.80365            0.9397
## Specificity                    0.97984           0.92808            0.6634
## Pos Pred Value                 0.70968           0.57516            0.8531
## Neg Pred Value                 0.91958           0.97501            0.8410
## Prevalence                     0.11892           0.10807            0.6753
## Detection Rate                 0.04342           0.08685            0.6346
## Detection Prevalence           0.06119           0.15100            0.7439
## Balanced Accuracy              0.67249           0.86587            0.8015
##                      Class: Riesling Class: Syrah
## Sensitivity                  0.40179     0.197674
## Specificity                  0.98746     0.998454
## Pos Pred Value               0.65217     0.850000
## Neg Pred Value               0.96577     0.965612
## Prevalence                   0.05527     0.042438
## Detection Rate               0.02221     0.008389
## Detection Prevalence         0.03405     0.009869
## Balanced Accuracy            0.69462     0.598064

Breakout Sessions